{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this notebook, we'll create a framework for image classification in Tensorflow.\n",
    "\n",
    "This code should be replicable to any image task with a few changes.\n",
    "\n",
    "Our specific task will be to classify images of traffic lights as red, yellow, or green."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#!/usr/bin/python2\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "import sys, os, time\n",
    "import itertools\n",
    "import math, random\n",
    "import glob\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import cv2\n",
    "from sklearn.metrics import confusion_matrix\n",
    "import matplotlib.pyplot as plt\n",
    "from IPython.display import Image, display\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First, we set some basic parameters:\n",
    "\n",
    "    base_image_path: the path where our training data is stored\n",
    "    \n",
    "    image_types: subdirectories in the images folder, each one representing a different class\n",
    "    \n",
    "    input_img_x/y: width and height of the images\n",
    "    \n",
    "    train_test_split_ratio: the ratio of training images to testing images\n",
    "    \n",
    "    batch_size: the minibatch size\n",
    "    \n",
    "    checkpoint_name: where we will save our best model\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Basic parameters\n",
    "\n",
    "max_epochs = 25\n",
    "base_image_path = \"5_tensorflow_traffic_light_images/\"\n",
    "image_types = [\"red\", \"green\", \"yellow\"]\n",
    "input_img_x = 32\n",
    "input_img_y = 32\n",
    "train_test_split_ratio = 0.9\n",
    "batch_size = 32\n",
    "checkpoint_name = \"model.ckpt\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Helper layer functions\n",
    "def weight_variable(shape):\n",
    "    initial = tf.truncated_normal(shape, stddev=0.1)\n",
    "    return tf.Variable(initial)\n",
    "\n",
    "def bias_variable(shape):\n",
    "    initial = tf.constant(0.1, shape=shape)\n",
    "    return tf.Variable(initial)\n",
    "\n",
    "def conv2d(x, W, stride):\n",
    "    return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')\n",
    "\n",
    "def max_pool_2x2(x):\n",
    "    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],\n",
    "                        strides=[1, 2, 2, 1], padding='SAME')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we initialize our input and output neurons.\n",
    "\n",
    "Our input neurons will be the shape of the image which is (32 x 32 x 3)\n",
    "\n",
    "Because our data will be one-hot encoded, we have as many output neurons as we have classes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Model\n",
    "\n",
    "x = tf.placeholder(tf.float32, shape=[None, input_img_x, input_img_y, 3])\n",
    "y_ = tf.placeholder(tf.float32, shape=[None, len(image_types)])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is where we specify our first convolutional layers.\n",
    "\n",
    "We specify the number of weights in the first line:\n",
    "\n",
    "    W_conv1 = weight_variable([3, 3, 3, 16])\n",
    "    \n",
    "This line is for specifying the number of bias variables, or the variables that will be added to weights after multiplying them by the activation.\n",
    "\n",
    "    b_conv1 = bias_variable([16])\n",
    "    \n",
    "Next, we specify the activation:\n",
    "\n",
    "    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1, 1) + b_conv1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x_image = x\n",
    "\n",
    "# Our first three convolutional layers, of 16 3x3 filters\n",
    "W_conv1 = weight_variable([3, 3, 3, 16])\n",
    "b_conv1 = bias_variable([16])\n",
    "h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1, 1) + b_conv1)\n",
    "\n",
    "W_conv2 = weight_variable([3, 3, 16, 16])\n",
    "b_conv2 = bias_variable([16])\n",
    "h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2, 1) + b_conv2)\n",
    "\n",
    "W_conv3 = weight_variable([3, 3, 16, 16])\n",
    "b_conv3 = bias_variable([16])\n",
    "h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 1) + b_conv3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Our pooling layer\n",
    "\n",
    "h_pool4 = max_pool_2x2(h_conv3)\n",
    "\n",
    "n1, n2, n3, n4 = h_pool4.get_shape().as_list()\n",
    "\n",
    "W_fc1 = weight_variable([n2*n3*n4, 3])\n",
    "b_fc1 = bias_variable([3])\n",
    "\n",
    "# We flatten our pool layer into a fully connected layer\n",
    "\n",
    "h_pool4_flat = tf.reshape(h_pool4, [-1, n2*n3*n4])\n",
    "\n",
    "y = tf.matmul(h_pool4_flat, W_fc1) + b_fc1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sess = tf.InteractiveSession()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Our loss function is defined as computing softmax, and then cross entropy.\n",
    "\n",
    "We also specify our optimizer, which takes a learning rate, and a loss function.\n",
    "\n",
    "Finally, we initialize all of our variables which will tell us if our model is valid."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Our loss function and optimizer\n",
    "\n",
    "loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_))\n",
    "train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)\n",
    "sess.run(tf.initialize_all_variables())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We need to load in our images.  We do so using OpenCV's imread function.  After loading in each image, we resize it to our input size.\n",
    "\n",
    "With each loaded image, we also specify the expected output.  For this, we use a one-hot encoding, creating an array of zeros represnting each class, and setting the index of the expected class number to 1.\n",
    "\n",
    "For example, if we have three classes, and we expect an order of: [red neuron, green neuron, yellow neuron]\n",
    "\n",
    "We initialize an array to [0, 0, 0] and if we loaded a yellow light, we change the last value to 1: [0, 0, 1]\n",
    "\n",
    "Finally, we shuffle our dataset.  (It's generally useful to seed our random generator with 0 at the start of the program)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "saver = tf.train.Saver()\n",
    "time_start = time.time()\n",
    "\n",
    "v_loss = least_loss = 99999999\n",
    "\n",
    "# Load data\n",
    "\n",
    "full_set = []\n",
    "\n",
    "for im_type in image_types:\n",
    "    for ex in glob.glob(os.path.join(base_image_path, im_type, \"*\")):\n",
    "        im = cv2.imread(ex)\n",
    "        if not im is None:\n",
    "            im = cv2.resize(im, (32, 32))\n",
    "\n",
    "            # Create an array representing our classes and set it\n",
    "            one_hot_array = [0] * len(image_types)\n",
    "            one_hot_array[image_types.index(im_type)] = 1\n",
    "            assert(im.shape == (32, 32, 3))\n",
    "\n",
    "            full_set.append((im, one_hot_array, ex))\n",
    "\n",
    "random.shuffle(full_set)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using our train_test_split_ratio we create two lists of examples: testing and training."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# We split our data into a training and test set here\n",
    "\n",
    "split_index = int(math.floor(len(full_set) * train_test_split_ratio))\n",
    "train_set = full_set[:split_index]\n",
    "test_set = full_set[split_index:]\n",
    "\n",
    "# We ensure that our training and test sets are a multiple of batch size\n",
    "train_set_offset = len(train_set) % batch_size\n",
    "test_set_offset = len(test_set) % batch_size\n",
    "train_set = train_set[: len(train_set) - train_set_offset]\n",
    "test_set = test_set[: len(test_set) - test_set_offset]\n",
    "\n",
    "train_x, train_y, train_z = zip(*train_set)\n",
    "test_x, test_y, test_z = zip(*test_set)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Every time we iterate over all of our training examples, we have completed one epoch.  Generally, we should start with one epoch while debugging, and in practice many datasets will converge with less than 100 epochs.  It's something that needs to be explored with each dataset.\n",
    "\n",
    "We split our training set into batches, which we train on in order.\n",
    "\n",
    "We then use our entire datset to calculate training and validation loss.  These are the values we want to minimize, but it's important to pay attention to the interaction between them.  If training loss is going down, but validation is staying the same, it means we are overfitting our dataset: our network is becoming increasingly good at correctly classifying our training examples, but our network isn't generalizing to other examples outside the training set.\n",
    "\n",
    "We save our model if the current model has the lowest validation seen in this training run."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(\"Starting training... [{} training examples]\".format(len(train_x)))\n",
    "\n",
    "v_loss = 9999999\n",
    "train_loss = []\n",
    "val_loss = []\n",
    "\n",
    "for i in range(0, max_epochs):\n",
    "\n",
    "    # Iterate over our training set\n",
    "    for tt in range(0, (len(train_x) / batch_size)):\n",
    "        start_batch = batch_size * tt\n",
    "        end_batch = batch_size * (tt + 1)\n",
    "        train_step.run(feed_dict={x: train_x[start_batch:end_batch], y_: train_y[start_batch:end_batch]})\n",
    "        ex_seen = \"Current epoch, examples seen: {:20} / {} \\r\".format(tt * batch_size, len(train_x))\n",
    "        sys.stdout.write(ex_seen.format(tt * batch_size))\n",
    "        sys.stdout.flush()\n",
    "\n",
    "    ex_seen = \"Current epoch, examples seen: {:20} / {} \\r\".format((tt + 1) * batch_size, len(train_x))\n",
    "    sys.stdout.write(ex_seen.format(tt * batch_size))\n",
    "    sys.stdout.flush()\n",
    "\n",
    "    t_loss = loss.eval(feed_dict={x: train_x, y_: train_y})\n",
    "    v_loss = loss.eval(feed_dict={x: test_x, y_: test_y})\n",
    "    \n",
    "    train_loss.append(t_loss)\n",
    "    val_loss.append(v_loss)\n",
    "\n",
    "    sys.stdout.write(\"Epoch {:5}: loss: {:15.10f}, val. loss: {:15.10f}\".format(i + 1, t_loss, v_loss))\n",
    "\n",
    "    if v_loss < least_loss:\n",
    "        sys.stdout.write(\", saving new best model to {}\".format(checkpoint_name))\n",
    "        least_loss = v_loss\n",
    "        filename = saver.save(sess, checkpoint_name)\n",
    "\n",
    "    sys.stdout.write(\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "plt.figure()\n",
    "plt.xticks(np.arange(0, len(train_loss), 1.0))\n",
    "plt.ylabel(\"Loss\")\n",
    "plt.xlabel(\"Epochs\")\n",
    "train_line = plt.plot(range(0, len(train_loss)), train_loss, 'r', label=\"Train loss\")\n",
    "val_line = plt.plot(range(0, len(val_loss)), val_loss, 'g', label=\"Validation loss\")\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's print the examples from our test set that were wrong:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "zipped_x_y = zip(test_x, test_y)\n",
    "conf_true = []\n",
    "conf_pred = []\n",
    "for tt in range(0, len(zipped_x_y)):\n",
    "    q = zipped_x_y[tt]\n",
    "    sfmax = list(sess.run(tf.nn.softmax(y.eval(feed_dict={x: [q[0]]})))[0])\n",
    "    sf_ind = sfmax.index(max(sfmax))\n",
    "    \n",
    "    predicted_label = image_types[sf_ind]\n",
    "    actual_label = image_types[q[1].index(max(q[1]))]\n",
    "    \n",
    "    conf_true.append(actual_label)\n",
    "    conf_pred.append(predicted_label)\n",
    "    \n",
    "    if predicted_label != actual_label:\n",
    "        print(\"Actual: {}, predicted: {}\".format(actual_label, predicted_label))\n",
    "        img_path = test_z[tt]    \n",
    "        ex_img = Image(filename=img_path)\n",
    "        display(ex_img)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# From sklearn docs\n",
    "def plot_confusion_matrix(cm, classes,\n",
    "                          normalize=False,\n",
    "                          title='Confusion matrix',\n",
    "                          cmap=plt.cm.Blues):\n",
    "    \"\"\"\n",
    "    This function prints and plots the confusion matrix.\n",
    "    Normalization can be applied by setting `normalize=True`.\n",
    "    \"\"\"\n",
    "    plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
    "    plt.title(title)\n",
    "    plt.colorbar()\n",
    "    tick_marks = np.arange(len(classes))\n",
    "    plt.xticks(tick_marks, classes, rotation=45)\n",
    "    plt.yticks(tick_marks, classes)\n",
    "\n",
    "    cm2 = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
    "\n",
    "    cm2 = np.around(cm2, 2)\n",
    "\n",
    "    thresh = cm.max() / 2.\n",
    "    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
    "        plt.text(j, i, str(cm[i, j]) + \" / \" + str(cm2[i, j]),\n",
    "                 horizontalalignment=\"center\",\n",
    "                 color=\"white\" if cm[i, j] > thresh else \"black\")\n",
    "\n",
    "    plt.tight_layout()\n",
    "    plt.ylabel('True label')\n",
    "    plt.xlabel('Predicted label')\n",
    "\n",
    "cnf_matrix = confusion_matrix(conf_true, conf_pred)\n",
    "plt.figure()\n",
    "plot_confusion_matrix(cnf_matrix, classes=image_types, normalize=False,\n",
    "                      title='Normalized confusion matrix')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
